acc_pri = subset(hmsc_acc_pri_cancer,subset = patient.ident != "HMSC")
Warning: Keys should be one or more alphanumeric characters followed by an underscore, setting key from pca_integrated_ to pcaintegrated_
Warning: Keys should be one or more alphanumeric characters followed by an underscore, setting key from umap_integrated_ to umapintegrated_
DimPlot(hmsc_acc_pri_cancer,group.by = "patient.ident",label = T)
FeaturePlot(object = hmsc_acc_pri_cancer,features = c("MYB"),pt.size = 1)
FeaturePlot(object = hmsc_acc_pri_cancer,features = c("kaye_acc_score"),pt.size = 1)
pdf("./Figures/kaye_acc_score_AllCancerCells.pdf")
FeaturePlot(object = all_acc_cancer_cells,features = c("kaye_acc_score"),pt.size = 1)
dev.off()
null device
1
DefaultAssay(hmsc_cancer_cells) = "RNA"
hallmark_name = "GO_MITOTIC_CELL_CYCLE"
acc_pri = ScaleData(object = acc_pri,features = VariableFeatures(acc_pri,assay = "RNA"))
Centering and scaling data matrix
|
| | 0%
|
|======= | 7%
|
|============== | 13%
|
|===================== | 20%
|
|============================ | 27%
|
|=================================== | 33%
|
|========================================== | 40%
|
|================================================= | 47%
|
|========================================================= | 53%
|
|================================================================ | 60%
|
|======================================================================= | 67%
|
|============================================================================== | 73%
|
|===================================================================================== | 80%
|
|============================================================================================ | 87%
|
|=================================================================================================== | 93%
|
|==========================================================================================================| 100%
geneIds= genesets_h[[hallmark_name]]@geneIds %>% intersect(VariableFeatures(acc_pri,assay = "RNA"))
score <- apply(acc_pri@assays$RNA@scale.data[geneIds,],2,mean)
acc_pri=AddMetaData(acc_pri,score,hallmark_name)
hmsc_cancer_cells = FindVariableFeatures(object = hmsc_cancer_cells,nfeatures = 15000)
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
hmsc_cancer_cells = ScaleData(object = hmsc_cancer_cells,features = VariableFeatures(hmsc_cancer_cells))
Centering and scaling data matrix
|
| | 0%
|
|======= | 7%
|
|============== | 13%
|
|===================== | 20%
|
|============================ | 27%
|
|=================================== | 33%
|
|========================================== | 40%
|
|================================================= | 47%
|
|========================================================= | 53%
|
|================================================================ | 60%
|
|======================================================================= | 67%
|
|============================================================================== | 73%
|
|===================================================================================== | 80%
|
|============================================================================================ | 87%
|
|=================================================================================================== | 93%
|
|==========================================================================================================| 100%
geneIds= genesets_h[[hallmark_name]]@geneIds %>% intersect(VariableFeatures(hmsc_cancer_cells))
score <- apply(hmsc_cancer_cells@assays$RNA@scale.data[geneIds,],2,mean)
hmsc_cancer_cells=AddMetaData(hmsc_cancer_cells,score,hallmark_name)
acc_cc_scores = FetchData(object = acc_pri,vars = "GO_MITOTIC_CELL_CYCLE")
hmsc_cc_scores = FetchData(object = hmsc_cancer_cells,vars = "GO_MITOTIC_CELL_CYCLE")
distributions_plt = ggplot() +
geom_density(aes(GO_MITOTIC_CELL_CYCLE, fill = "ACC"), alpha = .2, data = acc_cc_scores) +
geom_density(aes(GO_MITOTIC_CELL_CYCLE, fill = "HMSC"), alpha = .2, data = hmsc_cc_scores) +
scale_fill_manual(name = "Dataset", values = c(ACC = "red", HMSC = "green"))+ geom_vline(aes(xintercept=0.3),
color="blue", linetype="dashed", size=1) +ggtitle("'GO_MITOTIC_CELL_CYCLE' score distribution")
print_tab(plt = distributions_plt,title = "score distribution",subtitle_num = 3)
NA
hmsc_cc_cells = (sum(hmsc_cancer_cells@meta.data[[hallmark_name]]> 0.3) /ncol(hmsc_cancer_cells)) %>% round(digits = 3)*100
acc_cc_cells = (sum(acc_pri@meta.data[[hallmark_name]]> 0.3)/ncol(acc_pri)) %>% round(digits = 3)*100
df = data.frame(Dataset = c("HMSC","ACC"), cycling_cells_percentage = c(hmsc_cc_cells,acc_cc_cells))
cycling_cells_plt = ggplot(data=df, aes(x=Dataset, y=cycling_cells_percentage)) +
geom_text(aes(label=cycling_cells_percentage), vjust=0, color="black", size=3.5)+
geom_bar(stat="identity")+ylab(" % cycling cells")+
geom_bar(stat="identity", fill="steelblue")+
theme_minimal() + ggtitle("Cycling cells count")
print_tab(plt = cycling_cells_plt,title = "# cycling cells",subtitle_num = 3)
NA
pdf(file = "./Figures/CC_distributions.pdf")
distributions_plt
dev.off()
null device
1
pdf(file = "./Figures/cycling_cells.pdf")
cycling_cells_plt
dev.off()
null device
1
print_tab(plt =
FeaturePlot(hmsc_acc_pri_cancer,features = c("MKI67","CDK1","MCM2","CDC20"))
,title = "CC genes",subtitle_num = 3)
NA
#add score to all acc cancer cells
# geneIds= genesets[[hallmark_name]]@geneIds %>% intersect(VariableFeatures(all_acc_cancer_cells,assay = "integrated"))
# score <- apply(all_acc_cancer_cells@assays$integrated@scale.data[geneIds,],2,mean)
#add score to all acc cancer cells
cc_all = c(acc_pri$GO_MITOTIC_CELL_CYCLE, hmsc_cancer_cells$GO_MITOTIC_CELL_CYCLE) %>% as.data.frame()
hmsc_acc_pri_cancer=AddMetaData(hmsc_acc_pri_cancer,cc_all,hallmark_name)
#filter:
all_acc_cancer_cells_ccFiltered=hmsc_acc_pri_cancer[,hmsc_acc_pri_cancer@meta.data[[hallmark_name]]< 0.3]
Warning: Keys should be one or more alphanumeric characters followed by an underscore, setting key from pca_integrated_ to pcaintegrated_
Warning: Keys should be one or more alphanumeric characters followed by an underscore, setting key from umap_integrated_ to umapintegrated_
min_threshold = min(hmsc_acc_pri_cancer$GO_MITOTIC_CELL_CYCLE)
max_threshold = max(hmsc_acc_pri_cancer$GO_MITOTIC_CELL_CYCLE)
library(viridis)
print_tab(plt = FeaturePlot(object = hmsc_acc_pri_cancer,features = hallmark_name) + ggtitle("Before cc filtering") &
scale_color_gradientn(colours = plasma(n = 10, direction = -1), limits = c(min_threshold, max_threshold))
,title = "Before CC filtering",subtitle_num = 3)
Scale for ‘colour’ is already present. Adding another scale for ‘colour’, which will replace the existing scale.
print_tab(plt =
FeaturePlot(object = all_acc_cancer_cells_ccFiltered,features = hallmark_name) + ggtitle("After cc filtering") &
scale_color_gradientn(colours = plasma(n = 10, direction = -1), limits = c(min_threshold, max_threshold))
,title = "After CC filtering" ,subtitle_num = 3)
Scale for ‘colour’ is already present. Adding another scale for ‘colour’, which will replace the existing scale.
NA
all_acc_cancer_cells_ccFiltered = SetIdent(all_acc_cancer_cells_ccFiltered, value ="patient.ident")
acc_deg <-
FindMarkers(
all_acc_cancer_cells_ccFiltered,
ident.1 = "HMSC",
features = VariableFeatures(all_acc_cancer_cells_ccFiltered),
densify = T,
verbose = T,
slot = "data",
mean.fxn = function(x) {
return(log(rowMeans(x) + 1,base = 2)) # change func to calculate logFC in log space data (default to exponent data)
},
assay = "RNA"
)
library(hypeR)
genesets <- msigdb_download("Homo sapiens",category="H") %>% append( msigdb_download("Homo sapiens",category="C2",subcategory = "CP:KEGG"))
ranked_vec = acc_deg[,"avg_log2FC"]%>% setNames(rownames(acc_deg)) %>% na.omit() # make named vector
hyp_obj <-hypeR_fgsea(signature = ranked_vec,genesets = geneIds(genesets_h),up_only = F)
plt = hyp_dots(hyp_obj,merge = F)
plt1 = plt$up+ aes(size=nes)+ggtitle("up in HMSC")
plt2 = plt$dn+ aes(size=abs(nes))+ggtitle("up in ACC")
plt1+plt2
pdf(file = "./Figures/ACC_vs_HMSC_GSEA.pdf",width = 13,height = 6)
plt3
dev.off()
null device
1
volcano_plot(de_genes = acc_deg,fdr_cutoff = 0.05,fc_cutoff = 2, ident1 = "HMSC",ident2 = "ACC",top_genes_text = 4)
pdf("./Figures/volcano_plot_ACC_VS_HMSC.pdf")
volcano_plot(de_genes = acc_deg,fdr_cutoff = 0.05,fc_cutoff = 2, ident1 = "HMSC",ident2 = "ACC",top_genes_text = 4)
dev.off()
null device
1
pdf("./Figures/Enrichment_analysis_ACC_VS_HMSC.pdf")
enrichment_analysis(acc_deg,background = VariableFeatures(all_acc_cancer_cells_ccFiltered),fdr_Cutoff = 0.01,ident.1 =
"HMSC",ident.2 = "ACC",show_by = 1)
dev.off()
null device
1
top_hmsc_genes = acc_deg %>% dplyr::filter(avg_log2FC > 0) %>% slice_min(n = 10,order_by = p_val_adj) %>% rownames()
top_acc_genes = acc_deg %>% dplyr::filter(avg_log2FC < 0) %>% slice_min(n = 10,order_by = p_val_adj) %>% rownames()
all_top_deg = c(top_hmsc_genes,top_acc_genes)
all_acc_cancer_cells_ccFiltered$cancer_type = all_acc_cancer_cells_ccFiltered$patient.ident %>% gsub(pattern = "ACC.*",replacement = "ACC")
cancer_type = FetchData(object = all_acc_cancer_cells_ccFiltered, vars = "cancer_type")
# col_list = list(circlize::colorRamp2(c(0, 1), c("white", "red"))); names(col_list) = colnames(all_metagenes)[i]
column_ha = HeatmapAnnotation(df = cancer_type)
data = FetchData(object = all_acc_cancer_cells_ccFiltered,vars = all_top_deg,slot = "scale.data") %>% t()
print(ComplexHeatmap::Heatmap(data,show_column_names = F,row_names_gp = grid::gpar(fontsize = 7),cluster_rows = F, ,name = "Z-score expression",cluster_columns = F,top_annotation = column_ha))
NA
NA
# create expression matrix of acc + normal cells + HMSC seurat integrated
# acc_all_cells_noAcc1 = subset(acc_all_cells, subset = patient.ident != "ACC1")
# acc_expr = acc_all_cells_noAcc1@assays$RNA@data %>% as.data.frame()
# hmsc_expr = acc.combined@assays$integrated@data %>% as.data.frame()
# acc_expr = acc_expr [ rownames(hmsc_expr),]
# all_expr = cbind(acc_expr,hmsc_expr)
#
# # create annotation
# acc_annotation_integrated = as.data.frame(acc_all_cells@meta.data[,"cell.type",drop = F])
# acc_annotation_integrated = acc_annotation_integrated[colnames(all_expr),,drop = F]
# acc_annotation_integrated = acc_annotation_integrated %>% rownames_to_column("orig.ident")
# #write expression and annotation
# write.table(acc_annotation_integrated, "./Data/inferCNV/acc_annotation_integrated.txt", append = FALSE,
# sep = "\t", dec = ".",row.names = FALSE, col.names = F)
#
#
# write.table(all_expr, "./Data/inferCNV/all.4icnv_integrated.txt", append = FALSE,
# sep = "\t", dec = ".",row.names = T, col.names = T)
trace(infercnv::run,edit = T) # to skip normalization, change to skip_past = 4 (https://github.com/broadinstitute/infercnv/issues/346)
Tracing function "run" in package "infercnv"
[1] "run"
infercnv_obj = CreateInfercnvObject(raw_counts_matrix="./Data/inferCNV/all.4icnv_integrated.txt",
annotations_file="./Data/inferCNV/acc_annotation_integrated.txt",
delim="\t",gene_order_file="./Data/inferCNV/gencode_v19_gene_pos.txt"
,ref_group_names=c("CAF", "Endothelial", "WBC")) #groups of normal cells
infercnv_obj_default = infercnv::run(infercnv_obj, cutoff=1, out_dir='./Data/inferCNV/infercnv_intergrated_output',
cluster_by_groups=T, plot_steps=FALSE,
denoise=TRUE, HMM=FALSE, no_prelim_plot=TRUE,
png_res=300)
untrace(infercnv::run)
trace(infercnv:::get_group_color_palette ,edit = T) # change "Set3" to "Set1" for more distinguishable colors
plot_cnv(infercnv_obj_default, output_format = "png", write_expr_matrix = FALSE,out_dir = "./Data/inferCNV/infercnv_intergrated_output",png_res =800,obs_title = "Malignant cells",ref_title = "Normal cells",contig_cex = 2, title = "Copy number variation")
untrace(infercnv:::get_group_color_palette)
print_tab(plt = knitr::include_graphics("./Data/inferCNV/infercnv_intergrated_output/infercnv.png")
,title = "CNV plot",subtitle_num = 3)
NA
library(limma)
smoothed=apply(infercnv_obj_default@expr.data,2,tricubeMovingAverage, span=0.01)
cnsig=sqrt(apply((smoothed-1)^2,2,mean))
acc_all_cells <- AddMetaData(object = acc_all_cells, metadata = cnsig, col.name = "copynumber")
acc_all_cells = SetIdent(object = acc_all_cells,value = "cell.type")
print_tab(plt = FeaturePlot(acc_all_cells, "copynumber",pt.size = 1,label = T,repel = T)+
scale_colour_gradientn(colours=c("white","lightblue","orange","red","darkred"))
,title = "CNV UMAP",subtitle_num = 3)
Scale for ‘colour’ is already present. Adding another scale for ‘colour’, which will replace the existing scale.
NA
hmsc_cancer_cells = FindClusters(object = hmsc_cancer_cells,verbose = F,resolution = 0.5)
DimPlot(object = hmsc_cancer_cells,pt.size = 2)
FeaturePlot(object = hmsc_cancer_cells,features = c("MYB","JAG1"),pt.size = 2)+
DimPlot(object = hmsc_cancer_cells,group.by = c("hpv33_positive"),pt.size = 2)
reticulate::repl_python()
from cnmf import cNMF
import pickle
nfeatures = "2K"
f = open('./Data/cNMF/HMSC_cNMF_harmony_2Kvargenes/cnmf_obj.pckl', 'rb')
cnmf_obj = pickle.load(f)
f.close()
quit
knitr::include_graphics("./Data/cNMF/HMSC_cNMF_harmony_2Kvargenes/HMSC_cNMF_harmony_2Kvargenes.k_selection.png")
reticulate::repl_python()
selected_k = 3
density_threshold = 0.1
cnmf_obj.consensus(k=selected_k, density_threshold=density_threshold,show_clustering=True)
usage_norm, gep_scores, gep_tpm, topgenes = cnmf_obj.load_results(K=selected_k, density_threshold=density_threshold)
quit
gep_scores = py$gep_scores
gep_tpm = py$gep_tpm
all_metagenes= py$usage_norm
# Make metagene names
for (i in 1:ncol(all_metagenes)) {
colnames(all_metagenes)[i] = "metagene." %>% paste0(i)
}
#add each metagene to metadata
for (i in 1:ncol(all_metagenes)) {
metage_metadata = all_metagenes %>% select(i)
acc1_cancer_cells = AddMetaData(object = acc1_cancer_cells,metadata = metage_metadata)
}
Note: Using an external vector in selections is ambiguous. ℹ Use
all_of(i) instead of i to silence this
message. ℹ See https://tidyselect.r-lib.org/reference/faq-external-vector.html.
This message is displayed once per session.
print_tab(plt =
FeaturePlot(object = acc1_cancer_cells,features = colnames(all_metagenes),combine = T),
title = "metagenes expression",subtitle_num = toc_tabs_level)
NA
canonical_pathways = msigdbr(species = "Homo sapiens", category = "C2") %>% dplyr::filter(gs_subcat != "CGP") %>% dplyr::distinct(gs_name, gene_symbol)
plt_list = list()
for (i in 1:ncol(gep_scores)) {
top_genes = gep_scores %>% arrange(desc(gep_scores[i])) #sort by score a
top = head(rownames(top_genes),200) #take top top_genes_num
res = genes_vec_enrichment(genes = top,background = rownames(gep_scores),homer = T,title =
i,silent = T,return_all = T,custom_pathways = canonical_pathways)
plt_list[[i]] = res$plt
}
gridExtra::grid.arrange(grobs = plt_list)
for (i in 1:ncol(gep_scores)) {
ranked_vec = gep_scores %>% pull(i) %>% setNames(rownames(gep_scores))
hyp_obj <-hypeR_fgsea(signature = ranked_vec,genesets = genesets,up_only = T)
plt = hyp_dots(hyp_obj,merge = F)+ aes(size=abs(nes))
print(plt)
}
library(ComplexHeatmap)
acc1_cancer_cells = SetIdent(object = acc1_cancer_cells,value = "seurat_clusters")
for (i in 1:ncol(gep_scores)) {
top_genes = gep_scores %>% arrange(desc(gep_scores[i])) #sort by score a
top = head(rownames(top_genes),50) #take top top_genes_num
data = FetchData(object = acc1_cancer_cells,vars = top)%>% scale() %>% t()
metagene_data = FetchData(object = acc1_cancer_cells,vars = colnames(all_metagenes)[i])
col_list = list(circlize::colorRamp2(c(0, 1), c("white", "red"))); names(col_list) = colnames(all_metagenes)[i]
column_ha = HeatmapAnnotation(df = metagene_data,col = col_list)
print(ComplexHeatmap::Heatmap(data,show_column_names = F,row_names_gp = grid::gpar(fontsize = 7),cluster_rows = F, top_annotation =
column_ha,name = "Z-score expression"))
pdf(paste0("./Figures/NMF_top_genes_program",i,".pdf"))
print(ComplexHeatmap::Heatmap(data,show_column_names = F,row_names_gp = grid::gpar(fontsize = 7),cluster_rows = F, top_annotation =
column_ha,name = "Z-score expression"))
dev.off()
}
original_myo_genes = c( "TP63", "TP73", "CAV1", "CDH3", "KRT5", "KRT14", "ACTA2", "TAGLN", "MYLK", "DKK3")
original_lum_genes = c("KIT", "EHF", "ELF5", "KRT7", "CLDN3", "CLDN4", "CD24", "LGALS3", "LCN2", "SLPI" )
FeaturePlot(hmsc_cancer_cells,features = original_myo_genes)
FeaturePlot(hmsc_cancer_cells,features = original_lum_genes)
acc_cancerCells_noACC1 = SetIdent(acc_cancerCells_noACC1,value = "patient.ident")
calculate_score(dataset = acc_cancerCells_noACC1,myo_genes = original_myo_genes,lum_genes = original_lum_genes)
correlation of lum score and myo score: -0.51
correlation of lum score and original lum score: 1
correlation of myo score and original myo score: 1
calculate_score.2 <- function(dataset,myo_genes,lum_genes,lum_threshold =1 , myo_threshold = -1) {
myoscore=FetchData(object =dataset,vars = myo_genes,slot = "data") %>% rowMeans()
lescore=FetchData(object =dataset,vars = lum_genes,slot = "data") %>% rowMeans()
correlation = cor(lescore,myoscore) %>% round(digits = 2)
message("correlation of lum score and myo score:" %>% paste(correlation))
original_myo_genes = c("TP63", "TP73", "CAV1", "CDH3", "KRT5", "KRT14", "ACTA2", "TAGLN", "MYLK", "DKK3")
original_lum_genes = c("KIT", "EHF", "ELF5", "KRT7", "CLDN3", "CLDN4", "CD24", "LGALS3", "LCN2", "SLPI")
orig_myoscore=FetchData(object =dataset,vars = original_myo_genes,slot = "data") %>% rowMeans()
orig_lescore=FetchData(object =dataset,vars = original_lum_genes,slot = "data") %>% rowMeans()
correlation_to_original_lum = cor(orig_lescore,lescore) %>% round(digits = 2)
correlation_to_original_myo = cor(orig_myoscore,myoscore) %>% round(digits = 2)
message("correlation of lum score and original lum score:" %>% paste(correlation_to_original_lum))
message("correlation of myo score and original myo score:" %>% paste(correlation_to_original_myo))
dataset=AddMetaData(dataset,lescore-myoscore,"luminal_over_myo")
print(
FeaturePlot(object = dataset,features = "luminal_over_myo")
)
data = FetchData(object = dataset,vars = "luminal_over_myo")
print(
data %>%
ggplot(aes( x=luminal_over_myo)) +
geom_density() +ylab("Density")+theme( axis.title=element_text(size=12,face="bold"))+ xlab("Luminal-Myoepithelial spectrum")
)
lum_cells_num = subset(x = dataset,luminal_over_myo >(lum_threshold)) %>% ncol() /ncol(dataset)
myo_cells_num = subset(x = dataset,luminal_over_myo <(myo_threshold)) %>% ncol()/ncol(dataset)
df = data.frame(cell_type = c("myo_cells","lum_cells"),percentage = c(myo_cells_num,lum_cells_num))
ggplot(data=df, aes(x=cell_type, y=percentage)) +
geom_bar(stat="identity") + ggtitle("ACC cell types")
}
calculate_score.2(dataset = acc_pri,myo_genes = original_myo_genes,lum_genes = original_lum_genes,lum_threshold = 0,myo_threshold = 0)
correlation of lum score and myo score: -0.51
correlation of lum score and original lum score: 1
correlation of myo score and original myo score: 1
calculate_score(dataset = hmsc_cancer_cells,myo_genes = original_myo_genes,lum_genes = original_lum_genes,lum_threshold = 0,myo_threshold = 0)
correlation of lum score and myo score: -0.08
correlation of lum score and original lum score: 1
correlation of myo score and original myo score: 1
data = FetchData(object = hmsc_cancer_cells,vars = c(original_lum_genes))
a = cor(data)
ComplexHeatmap::Heatmap(matrix = cor(data),name = "pearson")
HPV33_P3 = fread("./Data/HPV33_P3.txt",col.names = c("plate","reads")) %>% as.data.frame()
HPV33_P3.df = HPV33_P3 %>% mutate(
plate = gsub(x =HPV33_P3$plate, replacement = "",pattern = "_.*$")
%>% gsub(pattern = "-P",replacement = ".P")
%>% gsub(pattern = "-",replacement = "_",)
)
HPV33_P3.df = HPV33_P3.df %>% dplyr::filter(HPV33_P3.df$plate %in% colnames(hmsc_cancer_cells))
rownames(HPV33_P3.df) <- HPV33_P3.df$plate
HPV33_P3.df$plate = NULL
HPV33_P2 = fread("./Data/HPV33_P2.txt",col.names = c("plate","reads")) %>% as.data.frame()
HPV33_P2.df = HPV33_P2 %>% mutate(
plate = gsub(x =HPV33_P2$plate, replacement = "",pattern = "_.*$")
%>% gsub(pattern = "plate2-",replacement = "plate2_",)
%>% gsub(pattern = "-",replacement = "\\.",)
)
HPV33_P2.df = HPV33_P2.df %>% dplyr::filter(HPV33_P2.df$plate %in% colnames(hmsc_cancer_cells))
rownames(HPV33_P2.df) <- HPV33_P2.df$plate
HPV33_P2.df$plate = NULL
HPV33 = rbind(HPV33_P3.df,HPV33_P2.df)
hmsc_cancer_cells = AddMetaData(object = hmsc_cancer_cells,metadata = HPV33,col.name = "HPV33.reads")
FeaturePlot(hmsc_cancer_cells,features = "HPV33.reads",max.cutoff = 10)
data = FetchData(object = hmsc_cancer_cells,vars = "HPV33.reads")
data = data %>% mutate("0 reads" = if_else(condition = HPV33.reads == 0,true = 1,false = 0))
data = data %>% mutate("1 reads" = if_else(condition = HPV33.reads == 1,true = 1,false = 0))
data = data %>% mutate("2 reads" = if_else(condition = HPV33.reads == 2,true = 1,false = 0))
data = data %>% mutate("3-23 reads" = if_else(condition = HPV33.reads >=3 &HPV33.reads <24,true = 1,false = 0))
data = data %>% mutate("24+ reads" = if_else(condition = HPV33.reads >=24,true = 1,false = 0))
data = colSums(data[,2:ncol(data)]) %>% as.data.frame()
names(data)[1] = "count"
data = rownames_to_column(data,var = "bin")
data
ggplot(data=data, aes(x=factor(bin,levels = c("0 reads","1 reads","2 reads","3-23 reads","24+ reads")), y=count)) +
geom_bar(stat="identity", fill="steelblue") + xlab("HPV Reads")+ theme_minimal()+
geom_text(aes(label=count), vjust=-0.5, color="black", size=3.5)
hpv33_positive = HPV33 %>% dplyr::mutate(hpv33_positive = case_when(reads >= 10 ~ "positive",
reads < 10 ~ "negative")
)
hpv33_positive$reads = NULL
hmsc_cancer_cells = AddMetaData(object = hmsc_cancer_cells,metadata = hpv33_positive)
DimPlot(object = hmsc_cancer_cells,group.by = c("hpv33_positive"),pt.size = 2)+
FeaturePlot(object = hmsc_cancer_cells,features = "MYB",pt.size = 2)
library(biomaRt)
ensembl = useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl")
DefaultAssay(hmsc_cancer_cells) = "integrated"
library("biomaRt")
# mart <- useMart(biomart="ensembl", dataset="hsapiens_gene_ensembl")
# all_coding_genes <- getBM(attributes = c( "hgnc_symbol"), filters = c("biotype"), values = list(biotype="protein_coding"), mart = mart)
features = VariableFeatures(hmsc_cancer_cells)
features = hmsc_cancer_cells@assays$RNA@data %>% rowMeans() %>% sort(decreasing = T) %>% head(3000) %>% names()
# features = intersect(features, VariableFeatures(hmsc_cancer_cells) )
# features = intersect(features, all_coding_genes[,1] )
acc_deg <-
FindMarkers(
hmsc_cancer_cells,
ident.1 = "positive",
ident.2 = "negative",
features = features,
densify = T,
assay = "RNA",
test.use = "LR",
latent.vars = "plate",
logfc.threshold = 0.1,
min.pct = 0.1,
only.pos = F,
mean.fxn = function(x) {
return(log(rowMeans(x) + 1, base = 2)) # change func to calculate logFC in log space data (default to exponent data)
# return(log(rowMeans(expm1(x)) + 1, base = 2))
}
)
| | 0 % ~calculating
|+ | 1 % ~09s
|++ | 2 % ~08s
|++ | 3 % ~11s
|+++ | 4 % ~10s
|+++ | 5 % ~10s
|++++ | 6 % ~09s
|++++ | 7 % ~09s
|+++++ | 8 % ~09s
|+++++ | 9 % ~09s
|++++++ | 10% ~08s
|++++++ | 11% ~08s
|+++++++ | 12% ~08s
|+++++++ | 13% ~08s
|++++++++ | 14% ~08s
|++++++++ | 15% ~08s
|+++++++++ | 16% ~07s
|+++++++++ | 17% ~07s
|++++++++++ | 18% ~07s
|++++++++++ | 19% ~08s
|+++++++++++ | 20% ~07s
|+++++++++++ | 21% ~07s
|++++++++++++ | 22% ~07s
|++++++++++++ | 23% ~07s
|+++++++++++++ | 24% ~07s
|+++++++++++++ | 26% ~07s
|++++++++++++++ | 27% ~07s
|++++++++++++++ | 28% ~07s
|+++++++++++++++ | 29% ~06s
|+++++++++++++++ | 30% ~06s
|++++++++++++++++ | 31% ~06s
|++++++++++++++++ | 32% ~06s
|+++++++++++++++++ | 33% ~06s
|+++++++++++++++++ | 34% ~06s
|++++++++++++++++++ | 35% ~06s
|++++++++++++++++++ | 36% ~06s
|+++++++++++++++++++ | 37% ~06s
|+++++++++++++++++++ | 38% ~06s
|++++++++++++++++++++ | 39% ~06s
|++++++++++++++++++++ | 40% ~05s
|+++++++++++++++++++++ | 41% ~05s
|+++++++++++++++++++++ | 42% ~05s
|++++++++++++++++++++++ | 43% ~05s
|++++++++++++++++++++++ | 44% ~05s
|+++++++++++++++++++++++ | 45% ~05s
|+++++++++++++++++++++++ | 46% ~05s
|++++++++++++++++++++++++ | 47% ~05s
|++++++++++++++++++++++++ | 48% ~05s
|+++++++++++++++++++++++++ | 49% ~05s
|+++++++++++++++++++++++++ | 50% ~04s
|++++++++++++++++++++++++++ | 51% ~04s
|+++++++++++++++++++++++++++ | 52% ~04s
|+++++++++++++++++++++++++++ | 53% ~04s
|++++++++++++++++++++++++++++ | 54% ~04s
|++++++++++++++++++++++++++++ | 55% ~04s
|+++++++++++++++++++++++++++++ | 56% ~04s
|+++++++++++++++++++++++++++++ | 57% ~04s
|++++++++++++++++++++++++++++++ | 58% ~04s
|++++++++++++++++++++++++++++++ | 59% ~04s
|+++++++++++++++++++++++++++++++ | 60% ~04s
|+++++++++++++++++++++++++++++++ | 61% ~04s
|++++++++++++++++++++++++++++++++ | 62% ~03s
|++++++++++++++++++++++++++++++++ | 63% ~03s
|+++++++++++++++++++++++++++++++++ | 64% ~03s
|+++++++++++++++++++++++++++++++++ | 65% ~03s
|++++++++++++++++++++++++++++++++++ | 66% ~03s
|++++++++++++++++++++++++++++++++++ | 67% ~03s
|+++++++++++++++++++++++++++++++++++ | 68% ~03s
|+++++++++++++++++++++++++++++++++++ | 69% ~03s
|++++++++++++++++++++++++++++++++++++ | 70% ~03s
|++++++++++++++++++++++++++++++++++++ | 71% ~03s
|+++++++++++++++++++++++++++++++++++++ | 72% ~02s
|+++++++++++++++++++++++++++++++++++++ | 73% ~02s
|++++++++++++++++++++++++++++++++++++++ | 74% ~02s
|++++++++++++++++++++++++++++++++++++++ | 76% ~02s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~02s
|+++++++++++++++++++++++++++++++++++++++ | 78% ~02s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~02s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~02s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~02s
|+++++++++++++++++++++++++++++++++++++++++ | 82% ~02s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~02s
|++++++++++++++++++++++++++++++++++++++++++ | 84% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~01s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~01s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++ | 90% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=09s
acc_deg$fdr<-p.adjust(p = as.vector(acc_deg$p_val) ,method = "fdr" )
ranked_vec = acc_deg[,"avg_log2FC"]%>% setNames(rownames(acc_deg)) %>% na.omit() # make named vector
hyp_obj <-hypeR_fgsea(signature = ranked_vec,genesets = genesets,up_only = F)
hyp_dots(hyp_obj,merge = F)×–
acc_deg
acc_deg[c("MYB","JAG1"),]
NA
volcano plot log2(mean logTPM HPV+) - log2(mean logTPM HPV-)
volcano_plot(de_genes = acc_deg,fc_cutoff = 1.3, fdr_cutoff = 0.1,show_gene_names = c("MYB","JAG1"),ident1 = "HPV33 positive",ident2 = "HPV33 negative",top_genes_text = 5)
acc_deg2 = acc_deg %>% mutate(avg_log2FC = exp(avg_log2FC))
volcano_plot(de_genes = acc_deg2,fc_cutoff = 2**(1.3), fdr_cutoff = 0.1,show_gene_names = c("MYB","JAG1"),ident1 = "HPV33 positive",ident2 = "HPV33 negative",top_genes_text = 5)
volcano plot log2(mean logTPM HPV+) - log2(mean logTPM HPV-)
acc_deg <-
FindMarkers(
acc1_cancer_cells,
ident.1 = "positive",
ident.2 = "negative",
features = features,
densify = T,
assay = "RNA",
test.use = "LR",
latent.vars = "plate",
logfc.threshold = 0.35,
min.pct = 0.1,
mean.fxn = function(x) {
return(rowMeans(x) + 1) # change func to calculate logFC in log space data (default to exponent data)
}
)
acc_deg$fdr<-p.adjust(p = as.vector(acc_deg$p_val) ,method = "fdr" )
acc_deg[c("MYB","JAG1"),]
volcano plot (mean logTPM HPV+) - (mean logTPM HPV-)
volcano_plot(de_genes = acc_deg,fc_cutoff = 1.3, fdr_cutoff = 0.1,show_gene_names = c("MYB","JAG1"),ident1 = "HPV33 positive",ident2 = "HPV33 negative",top_genes_text = 5)+xlab("avg diff")
Warning in de_genes$delabel[up_genes] <- `*vtmp*` :
number of items to replace is not a multiple of replacement length
notch_genes = c("JAG1","JAG2","NOTCH3","NOTCH2","NOTCH1","DLL1","MYB","HES4","HEY1","HEY2","NRARP")
for (gene in notch_genes) {
myb_vs_hpv = FetchData(object = acc1_cancer_cells,vars = c("hpv33_positive",gene))
myb_vs_hpv$hpv33_positive = paste("HPV33",myb_vs_hpv$hpv33_positiv)
p = ggboxplot(myb_vs_hpv, x = "hpv33_positive", y = gene,
palette = "jco",
add = "jitter")+ stat_compare_means(method = "wilcox.test",comparisons = list(c("HPV33 positive","HPV33 negative")))+ stat_summary(fun.data = function(x) data.frame(y=max(x)*1.2, label = paste("Mean=",round(mean(x),digits = 2))), geom="text") +ylab("log2(gene)")+ggtitle(gene)
print_tab(p,title = gene)
}
## JAG1 {.unnumbered }
## JAG2 {.unnumbered }
## NOTCH3 {.unnumbered }
## NOTCH2 {.unnumbered }
## NOTCH1 {.unnumbered }
## DLL1 {.unnumbered }
## MYB {.unnumbered }
## HES4 {.unnumbered }
## HEY1 {.unnumbered }
## HEY2 {.unnumbered }
## NRARP {.unnumbered }
NA
notch_targets = c("NOTCH3","HES4","HEY1","HEY2","NRARP")
notch_ligand = c("JAG1","JAG2","DLL1")
notch_genes = list(notch_targets = notch_targets,notch_ligand = notch_ligand)
for (i in 1:length(notch_genes)) {
genes = notch_genes[[i]]
name = names( notch_genes)[i]
myb_vs_hpv = FetchData(object = acc1_cancer_cells,vars = c(genes)) %>% rowMeans()
myb_vs_hpv = myb_vs_hpv %>% cbind(FetchData(object = acc1_cancer_cells,vars = c("hpv33_positive")))
colnames(myb_vs_hpv)[1] = "gene_set"
myb_vs_hpv$hpv33_positive = paste("HPV33",myb_vs_hpv$hpv33_positiv)
p = ggboxplot(myb_vs_hpv, x = "hpv33_positive", y = "gene_set",
palette = "jco",
add = "jitter")+ stat_compare_means(method = "wilcox.test",comparisons = list(c("HPV33 positive","HPV33 negative")))+ stat_summary(fun.data = function(x) data.frame(y=max(x)*1.2, label = paste("Mean=",round(mean(x),digits = 2))), geom="text") +ylab("log2(gene)")+ggtitle(name)
print(p)
}
cor_data = FetchData(object = acc1_cancer_cells,vars = c("MYB","myo_score"))
ggplot(cor_data, aes(x=MYB, y=myo_score)) +
stat_cor(method = "pearson")+
geom_smooth(method=lm) +
geom_point()
cor_data = FetchData(object = acc1_cancer_cells,vars = c("JAG1","myo_score"))
ggplot(cor_data, aes(x=JAG1, y=myo_score)) +
stat_cor(method = "pearson")+
geom_smooth(method=lm) +
geom_point()
cor_data = FetchData(object = acc1_cancer_cells,vars = c("JAG2","myo_score"))
ggplot(cor_data, aes(x=JAG2, y=myo_score)) +
stat_cor(method = "pearson")+
geom_smooth(method=lm) +
geom_point()
cor_data = FetchData(object = acc1_cancer_cells,vars = c("DLL1","myo_score"))
ggplot(cor_data, aes(x=DLL1, y=myo_score)) +
stat_cor(method = "pearson")+
geom_smooth(method=lm) +
geom_point()
cor_data = FetchData(object = acc1_cancer_cells,vars = notch_targets) %>% rowMeans()
cor_data = cor_data %>% cbind(FetchData(object = acc1_cancer_cells,vars = c("myo_score")))
colnames(cor_data)[1] = "notch_targets"
ggplot(cor_data, aes(x=notch_targets, y=myo_score)) +
stat_cor(method = "pearson")+
geom_smooth(method=lm) +
geom_point()
cor_data = FetchData(object = acc1_cancer_cells,vars = notch_ligand) %>% rowMeans()
cor_data = cor_data %>% cbind(FetchData(object = acc1_cancer_cells,vars = c("myo_score")))
colnames(cor_data)[1] = "notch_ligand"
ggplot(cor_data, aes(x=notch_ligand, y=myo_score)) +
stat_cor(method = "pearson")+
geom_smooth(method=lm) +
geom_point()
notch_score = FetchData(object = all_acc_cancer_cells,vars = notch_targets) %>% rowMeans()
all_acc_cancer_cells = AddMetaData(object = all_acc_cancer_cells,metadata = notch_score,col.name = "notch_score")
FeaturePlot(object = all_acc_cancer_cells,features = "notch_score" )
myo_markers = c("TP63", "TP73", "KRT14", "CDH3")
score = FetchData(object = acc1_cancer_cells,vars = myo_markers) %>% rowMeans()
acc1_cancer_cells = AddMetaData(object = acc1_cancer_cells,metadata = score,col.name = "myo_markers_score")
FeaturePlot(object = acc1_cancer_cells,features = "myo_markers_score",pt.size = 2 )
markers = c("CLDN3", "ANXA8", "EHF", "KIT")
score = FetchData(object = acc1_cancer_cells,vars = markers) %>% rowMeans()
acc1_cancer_cells = AddMetaData(object = acc1_cancer_cells,metadata = score,col.name = "lum_markers_score")
FeaturePlot(object = acc1_cancer_cells,features = "lum_markers_score" ,pt.size = 2 )